Proteins of
Interest
proteins_of_interest <- c(
"PDCD4",
"LGALS1",
"LGALS3",
"LGALSL", #replaced galectin by the gene name
"CCL17",
"BCCIP",
"CDC42",
"CDC37",
"CDC38",
"CCD86",
"LPXN",
"PAX",
"CYTB",
"SWI/SNF",
"NFKB1",
"TNFAIP8",
"ST13",
"NEUA",
"CYC1",
"PDCD5",
"PDCD6",
"PDCD6IP",
"SERPINB6",
"CD63",
"CD70",
"LCA",
"NUDC",
"BAG6",
"BCLAF1",
"JAK",
"STAT6",
"STAT3",
"STAT5A",
"STAT1",
"NUDT5",
"PRCP",
"SMARCC2",
"BANF1",
"BAF", # added baf because BANF1 = BAF is not possible
"MANF",
"NENF",
"DDB1",
"ARMT1",
"FEN1",
"APEX1",
"PAFAH1B1",
"EEF1E1",
"HDGF",
"GRB2",
"MYDGF",
"OGFR",
"CCAR1",
"CCAR2",
"AIFM1",
"API5",
"ACIN1",
"RBM4",
"SDHB",
"PURA",
"SDHA",
"DLST",
"SUCLG1",
"OXCT1",
"OGDH",
"SUCLG2",
"SUCLA2",
"ALDH",
"SOD2",
"MCM2",
"ACY1",
"GATM",
"HEATR",
"NUP210",
"NUP214",
"VDAC1",
"VDAC2",
"VDAC3",
"BCCP",
"HK2",
"HK1",
"PARP1",
"PARP4",
"H2AFX",
"PCYT1A",
"HSPA1B",
"CASP3",
"SLC4A7",
"EPS15L1",
"KCNAB2",
"TNFRSF8",
"CCBL2",
"GAR1"
)
Load Libraries &
Plot function & Colors
Functions
############################################################
# Function: run_deqms_pipeline
# Purpose: Fit limma model, apply DEqMS (spectraCounteBayes),
# and show the variance boxplot.
############################################################
run_deqms_pipeline <- function(protein.matrix, design, pep.count.table,
contrast_string = "classC-classE",
main_title = "Label-free dataset") {
# Step 1: Fit linear model with limma
fit1 <- limma::lmFit(protein.matrix, design = design)
# Step 2: Define contrasts
cont <- limma::makeContrasts(contrasts = contrast_string, levels = design)
fit2 <- limma::contrasts.fit(fit1, contrasts = cont)
# Step 3: Empirical Bayes shrinkage
fit3 <- limma::eBayes(fit2)
fit3$sigma[which(fit3$sigma == 0)] <- 1e-14
# Step 4: Attach peptide counts
fit3$count <- pep.count.table[rownames(fit3$coefficients), "count"]
# Validate peptide count input
if (any(is.na(fit3$count)) || min(fit3$count, na.rm = TRUE) <= 0) {
stop("Error: At least one entry in 'pep.count.table$count' is NA or <= 0. Please verify inputs.")
}
# Step 5: DEqMS modeling
fit4 <- DEqMS::spectraCounteBayes(fit3)
# Step 6: Visualization
DEqMS::VarianceBoxplot(fit4, n = 20,
main = main_title,
xlab = "peptide count + 1")
return(fit4)
}
############################################################
# Function: plot_volcano
# Purpose: Create a volcano plot using log2 fold change and q-values
############################################################
plot_volcano <- function(res, title = "", lfc_limit = NA) {
res.plot <- res %>%
dplyr::filter(!is.na(adj.P.Val)) %>%
dplyr::arrange(adj.P.Val) %>%
dplyr::rowwise() %>%
dplyr::mutate(
threshold = adj.P.Val < 0.05 & abs(logFC) > 1.0,
out_of_bounds = ifelse(is.na(lfc_limit), 0, (abs(logFC) > lfc_limit) * sign(logFC)),
logFC_capped = ifelse(out_of_bounds != 0, lfc_limit * sign(logFC), logFC)
) %>%
dplyr::ungroup()
res.plot %<>%
dplyr::mutate(genelabels = ifelse(threshold, gene, ""))
maxFC <- max(abs(res.plot$logFC))
if (!is.na(lfc_limit) && lfc_limit < maxFC) {
xlim <- lfc_limit
} else {
xlim <- maxFC * 1.04
}
ggplot2::ggplot(res.plot, ggplot2::aes(x = logFC_capped, y = adj.P.Val)) +
ggplot2::geom_point(data = subset(res.plot, out_of_bounds == 0),
ggplot2::aes(colour = threshold), alpha = 0.5) +
ggplot2::geom_point(data = subset(res.plot, out_of_bounds == -1),
ggplot2::aes(colour = threshold), shape = "\u25c4", size = 2) +
ggplot2::geom_point(data = subset(res.plot, out_of_bounds == 1),
ggplot2::aes(colour = threshold), shape = "\u25BA", size = 2) +
ggplot2::geom_hline(yintercept = 0.05, linetype = 2) +
ggplot2::geom_vline(xintercept = c(-1, 1), linetype = 2) +
ggplot2::scale_x_continuous(breaks = scales::pretty_breaks(),
limits = c(-xlim, xlim),
expand = ggplot2::expansion(0.01)) +
ggplot2::scale_y_continuous(trans = c("log10", "reverse"),
breaks = scales::log_breaks(),
labels = scales::scientific) +
ggplot2::ggtitle(title) +
ggplot2::xlab("log2 Fold Change") +
ggplot2::ylab("q-value") +
ggplot2::theme_minimal() +
ggplot2::theme(legend.position = "none")
}
############################################################
# Function: extract_gene_names
# Purpose: Parse FASTA headers and extract gene names (GN=... entries)
############################################################
extract_gene_names <- function(fasta_headers) {
sapply(fasta_headers, function(x) {
if (is.na(x) || x == "") return(NA_character_) # Handle empty or NA lines
# Find all GN=... matches
genes <- unlist(regmatches(x, gregexpr("GN=([^ ]+)", x)))
if (length(genes) == 0) return(NA_character_) # No GN found
# Remove "GN=" prefix
genes <- gsub("GN=", "", genes)
# Remove duplicates and collapse multiple entries
paste(unique(genes), collapse = ";")
})
}
C vs E
############################################################
# C vs E
############################################################
# Load MaxQuant output
df <- read.csv("./results_maxquant/hl428_all_galaxy.txt", sep = "\t")
df.prot <- df
# Remove reverse/contaminant/site-only identifications
df.prot <- df.prot[!df.prot$Reverse == "+", ]
df.prot <- df.prot[!df.prot$Potential.contaminant == "+", ]
df.prot <- df.prot[!df.prot$Only.identified.by.site == "+", ]
# Extract gene names via helper
df.prot$Gene.names <- extract_gene_names(df.prot$Fasta.headers)
# Extract LFQ intensity columns (as provided)
df.LFQ <- df.prot[, c(500:523, 524:547)]
df.LFQ[df.LFQ == 0] <- NA
print(colnames(df.LFQ))
## [1] "LFQ.intensity.HL_428_C_1A_15_1.raw.thermo.raw"
## [2] "LFQ.intensity.HL_428_C_1A_15_2.raw.thermo.raw"
## [3] "LFQ.intensity.HL_428_C_1A_15_3.raw.thermo.raw"
## [4] "LFQ.intensity.HL_428_C_1B_15_1.raw.thermo.raw"
## [5] "LFQ.intensity.HL_428_C_1B_15_2.raw.thermo.raw"
## [6] "LFQ.intensity.HL_428_C_1B_15_3.raw.thermo.raw"
## [7] "LFQ.intensity.HL_428_C_1C_15_1.raw.thermo.raw"
## [8] "LFQ.intensity.HL_428_C_1C_15_2.raw.thermo.raw"
## [9] "LFQ.intensity.HL_428_C_1C_15_3.raw.thermo.raw"
## [10] "LFQ.intensity.HL_428_C1_A_01.raw.thermo.raw"
## [11] "LFQ.intensity.HL_428_C1_A_02.raw.thermo.raw"
## [12] "LFQ.intensity.HL_428_C1_A_03.raw.thermo.raw"
## [13] "LFQ.intensity.HL_428_C1_B_01.raw.thermo.raw"
## [14] "LFQ.intensity.HL_428_C1_B_02.raw.thermo.raw"
## [15] "LFQ.intensity.HL_428_C1_B_03.raw.thermo.raw"
## [16] "LFQ.intensity.HL_428_C2_A_01.raw.thermo.raw"
## [17] "LFQ.intensity.HL_428_C2_A_02.raw.thermo.raw"
## [18] "LFQ.intensity.HL_428_C2_A_03.raw.thermo.raw"
## [19] "LFQ.intensity.HL_428_C2_B_01.raw.thermo.raw"
## [20] "LFQ.intensity.HL_428_C2_B_02.raw.thermo.raw"
## [21] "LFQ.intensity.HL_428_C2_B_03.raw.thermo.raw"
## [22] "LFQ.intensity.HL_428_C2_C_01.raw.thermo.raw"
## [23] "LFQ.intensity.HL_428_C2_C_02.raw.thermo.raw"
## [24] "LFQ.intensity.HL_428_C2_C_03.raw.thermo.raw"
## [25] "LFQ.intensity.HL_428_E_1A_15_1.raw.thermo.raw"
## [26] "LFQ.intensity.HL_428_E_1A_15_2.raw.thermo.raw"
## [27] "LFQ.intensity.HL_428_E_1A_15_3.raw.thermo.raw"
## [28] "LFQ.intensity.HL_428_E_1B_15_1.raw.thermo.raw"
## [29] "LFQ.intensity.HL_428_E_1B_15_2.raw.thermo.raw"
## [30] "LFQ.intensity.HL_428_E_1B_15_3.raw.thermo.raw"
## [31] "LFQ.intensity.HL_428_E_1C_15_1.raw.thermo.raw"
## [32] "LFQ.intensity.HL_428_E_1C_15_2.raw.thermo.raw"
## [33] "LFQ.intensity.HL_428_E_1C_15_3.raw.thermo.raw"
## [34] "LFQ.intensity.HL_428_E1_A_01.raw.thermo.raw"
## [35] "LFQ.intensity.HL_428_E1_A_02.raw.thermo.raw"
## [36] "LFQ.intensity.HL_428_E1_A_03.raw.thermo.raw"
## [37] "LFQ.intensity.HL_428_E1_B_01.raw.thermo.raw"
## [38] "LFQ.intensity.HL_428_E1_B_02.raw.thermo.raw"
## [39] "LFQ.intensity.HL_428_E1_B_03.raw.thermo.raw"
## [40] "LFQ.intensity.HL_428_E2_A_01.raw.thermo.raw"
## [41] "LFQ.intensity.HL_428_E2_A_02.raw.thermo.raw"
## [42] "LFQ.intensity.HL_428_E2_A_03.raw.thermo.raw"
## [43] "LFQ.intensity.HL_428_E2_B_01.raw.thermo.raw"
## [44] "LFQ.intensity.HL_428_E2_B_02.raw.thermo.raw"
## [45] "LFQ.intensity.HL_428_E2_B_03.raw.thermo.raw"
## [46] "LFQ.intensity.HL_428_E2_C_01.raw.thermo.raw"
## [47] "LFQ.intensity.HL_428_E2_C_02.raw.thermo.raw"
## [48] "LFQ.intensity.HL_428_E2_C_03.raw.thermo.raw"
# Set protein IDs as rownames
rownames(df.LFQ) <- df.prot$Majority.protein.IDs
# Transpose: rows = samples, cols = proteins
df.LFQ <- t(df.LFQ)
# Track columns with all NA (to align peptide counts later)
keep_cols <- colSums(!is.na(df.LFQ)) > 0
removed_col_indices <- which(!keep_cols)
# Remove columns that are entirely NA
df.LFQ.clean <- df.LFQ[, colSums(!is.na(df.LFQ)) > 0]
# Log2 transform
df.LFQ.clean <- log2(df.LFQ.clean)
# Missing-value imputation (local least squares)
result <- llsImpute(as.matrix(df.LFQ.clean), k = 10, correlation = "pearson", allVariables = TRUE)
df.LFQ <- as.data.frame(completeObs(result))
df.LFQ <- as.data.frame(t(df.LFQ))
# Validity counts per group (number of non-NA per protein)
df.LFQ$valid_A <- apply(df.LFQ[, 1:24], 1, function(x) sum(!is.na(x)))
df.LFQ$valid_B <- apply(df.LFQ[, 25:48], 1, function(x) sum(!is.na(x)))
# Filter proteins by minimal presence criteria
df.LFQ.filter <- df.LFQ[df.LFQ$valid_A >= 1 | df.LFQ$valid_B >= 2, 1:48]
# Inspect peptide count columns for the corresponding samples
print(colnames(df.prot[, c(107:130, 131:154)]))
## [1] "Razor...unique.peptides.HL_428_C_1A_15_1.raw.thermo.raw"
## [2] "Razor...unique.peptides.HL_428_C_1A_15_2.raw.thermo.raw"
## [3] "Razor...unique.peptides.HL_428_C_1A_15_3.raw.thermo.raw"
## [4] "Razor...unique.peptides.HL_428_C_1B_15_1.raw.thermo.raw"
## [5] "Razor...unique.peptides.HL_428_C_1B_15_2.raw.thermo.raw"
## [6] "Razor...unique.peptides.HL_428_C_1B_15_3.raw.thermo.raw"
## [7] "Razor...unique.peptides.HL_428_C_1C_15_1.raw.thermo.raw"
## [8] "Razor...unique.peptides.HL_428_C_1C_15_2.raw.thermo.raw"
## [9] "Razor...unique.peptides.HL_428_C_1C_15_3.raw.thermo.raw"
## [10] "Razor...unique.peptides.HL_428_C1_A_01.raw.thermo.raw"
## [11] "Razor...unique.peptides.HL_428_C1_A_02.raw.thermo.raw"
## [12] "Razor...unique.peptides.HL_428_C1_A_03.raw.thermo.raw"
## [13] "Razor...unique.peptides.HL_428_C1_B_01.raw.thermo.raw"
## [14] "Razor...unique.peptides.HL_428_C1_B_02.raw.thermo.raw"
## [15] "Razor...unique.peptides.HL_428_C1_B_03.raw.thermo.raw"
## [16] "Razor...unique.peptides.HL_428_C2_A_01.raw.thermo.raw"
## [17] "Razor...unique.peptides.HL_428_C2_A_02.raw.thermo.raw"
## [18] "Razor...unique.peptides.HL_428_C2_A_03.raw.thermo.raw"
## [19] "Razor...unique.peptides.HL_428_C2_B_01.raw.thermo.raw"
## [20] "Razor...unique.peptides.HL_428_C2_B_02.raw.thermo.raw"
## [21] "Razor...unique.peptides.HL_428_C2_B_03.raw.thermo.raw"
## [22] "Razor...unique.peptides.HL_428_C2_C_01.raw.thermo.raw"
## [23] "Razor...unique.peptides.HL_428_C2_C_02.raw.thermo.raw"
## [24] "Razor...unique.peptides.HL_428_C2_C_03.raw.thermo.raw"
## [25] "Razor...unique.peptides.HL_428_E_1A_15_1.raw.thermo.raw"
## [26] "Razor...unique.peptides.HL_428_E_1A_15_2.raw.thermo.raw"
## [27] "Razor...unique.peptides.HL_428_E_1A_15_3.raw.thermo.raw"
## [28] "Razor...unique.peptides.HL_428_E_1B_15_1.raw.thermo.raw"
## [29] "Razor...unique.peptides.HL_428_E_1B_15_2.raw.thermo.raw"
## [30] "Razor...unique.peptides.HL_428_E_1B_15_3.raw.thermo.raw"
## [31] "Razor...unique.peptides.HL_428_E_1C_15_1.raw.thermo.raw"
## [32] "Razor...unique.peptides.HL_428_E_1C_15_2.raw.thermo.raw"
## [33] "Razor...unique.peptides.HL_428_E_1C_15_3.raw.thermo.raw"
## [34] "Razor...unique.peptides.HL_428_E1_A_01.raw.thermo.raw"
## [35] "Razor...unique.peptides.HL_428_E1_A_02.raw.thermo.raw"
## [36] "Razor...unique.peptides.HL_428_E1_A_03.raw.thermo.raw"
## [37] "Razor...unique.peptides.HL_428_E1_B_01.raw.thermo.raw"
## [38] "Razor...unique.peptides.HL_428_E1_B_02.raw.thermo.raw"
## [39] "Razor...unique.peptides.HL_428_E1_B_03.raw.thermo.raw"
## [40] "Razor...unique.peptides.HL_428_E2_A_01.raw.thermo.raw"
## [41] "Razor...unique.peptides.HL_428_E2_A_02.raw.thermo.raw"
## [42] "Razor...unique.peptides.HL_428_E2_A_03.raw.thermo.raw"
## [43] "Razor...unique.peptides.HL_428_E2_B_01.raw.thermo.raw"
## [44] "Razor...unique.peptides.HL_428_E2_B_02.raw.thermo.raw"
## [45] "Razor...unique.peptides.HL_428_E2_B_03.raw.thermo.raw"
## [46] "Razor...unique.peptides.HL_428_E2_C_01.raw.thermo.raw"
## [47] "Razor...unique.peptides.HL_428_E2_C_02.raw.thermo.raw"
## [48] "Razor...unique.peptides.HL_428_E2_C_03.raw.thermo.raw"
# Build peptide-count table (unique+razor) for DEqMS; min across samples
pep.count.table <- data.frame(
count = rowMins(as.matrix(df.prot[-removed_col_indices, c(107:130, 131:154)])),
row.names = df.prot$Majority.protein.IDs[-removed_col_indices]
)
# Add pseudocount to avoid zeros
pep.count.table$count <- pep.count.table$count + 1
# Construct protein matrix for limma/DEqMS
protein.matrix <- as.matrix(df.LFQ.filter)
# Factors for class and replicate (batch)
class <- factor(c(rep("C", 24), rep("E", 24)))
replicate <- factor(c(rep("rep1", 12), rep("rep2", 12),
rep("rep1", 12), rep("rep2", 12)))
# Design without intercept; replicate as covariate
design <- model.matrix(~ 0 + class + replicate)
colnames(design)[1:2] <- c("classC", "classE")
# Run DEqMS pipeline with specified contrast
fit_result <- run_deqms_pipeline(
protein.matrix, design, pep.count.table,
contrast_string = "classC-classE"
)
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : pseudoinverse used at -0.029654
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : neighborhood radius 1.6146
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : There are other near singularities as well. 2.5121

# Collect DEqMS results for the first coefficient
DEqMS.results <- outputResult(fit_result, coef_col = 1)
# Add gene names to results
rownames(df.prot) <- df.prot$Majority.protein.IDs
DEqMS.results$Gene.name <- df.prot[DEqMS.results$gene, ]$Gene.names
# Export results with timestamp
timestamp <- format(Sys.time(), "%Y%m%d_%H%M%S")
write.csv(DEqMS.results,
file = paste0("/proj/proteomics/7_combinedHL/evaluation_paper/abc_2025/raw_log2FCdata/hl428_all/data_",
timestamp, ".csv"))
# Process results: remove NA logFC and round numeric columns
processed_df <- DEqMS.results %>%
dplyr::filter(!is.na(logFC)) %>%
dplyr::mutate(across(where(is.numeric), ~ round(.x, 3)))
# Display table
datatable(processed_df, rownames = FALSE)
# Proteins of interest (assumes 'proteins_of_interest' is defined upstream)
processed_df_interest <- processed_df %>%
dplyr::filter(Gene.name %in% proteins_of_interest)
datatable(processed_df_interest, rownames = FALSE, caption = "proteins of interest")
# Volcano plot
vp_lfc_limit <- 5
plot_volcano(DEqMS.results, "C vs E", vp_lfc_limit)

# Counts for strongly regulated proteins
sig <- DEqMS.results %>% dplyr::filter(sca.P.Value <= 0.05)
n_up <- sum(sig$logFC >= 1.0, na.rm = TRUE)
n_down <- sum(sig$logFC <= -1.0, na.rm = TRUE)
n_total <- sum(abs(sig$logFC) >= 1.0, na.rm = TRUE)
cat("Strongly upregulated (logFC ≥ 1.0):", n_up, "\n")
## Strongly upregulated (logFC ≥ 1.0): 38
cat("Strongly downregulated (logFC ≤ -1.0):", n_down, "\n")
## Strongly downregulated (logFC ≤ -1.0): 46
cat("Total |logFC| ≥ 1.0:", n_total, "\n")
## Total |logFC| ≥ 1.0: 84
C vs RE
############################################################
# C vs RE
############################################################
df <- read.csv("./results_maxquant/hl428_all_galaxy.txt", sep = "\t")
df.prot <- df
df.prot <- df.prot[!df.prot$Reverse == "+", ]
df.prot <- df.prot[!df.prot$Potential.contaminant == "+", ]
df.prot <- df.prot[!df.prot$Only.identified.by.site == "+", ]
df.prot$Gene.names <- extract_gene_names(df.prot$Fasta.headers)
df.LFQ <- df.prot[, c(500:523, 572:586, 588:595)]
df.LFQ[df.LFQ == 0] <- NA
print(colnames(df.LFQ))
## [1] "LFQ.intensity.HL_428_C_1A_15_1.raw.thermo.raw"
## [2] "LFQ.intensity.HL_428_C_1A_15_2.raw.thermo.raw"
## [3] "LFQ.intensity.HL_428_C_1A_15_3.raw.thermo.raw"
## [4] "LFQ.intensity.HL_428_C_1B_15_1.raw.thermo.raw"
## [5] "LFQ.intensity.HL_428_C_1B_15_2.raw.thermo.raw"
## [6] "LFQ.intensity.HL_428_C_1B_15_3.raw.thermo.raw"
## [7] "LFQ.intensity.HL_428_C_1C_15_1.raw.thermo.raw"
## [8] "LFQ.intensity.HL_428_C_1C_15_2.raw.thermo.raw"
## [9] "LFQ.intensity.HL_428_C_1C_15_3.raw.thermo.raw"
## [10] "LFQ.intensity.HL_428_C1_A_01.raw.thermo.raw"
## [11] "LFQ.intensity.HL_428_C1_A_02.raw.thermo.raw"
## [12] "LFQ.intensity.HL_428_C1_A_03.raw.thermo.raw"
## [13] "LFQ.intensity.HL_428_C1_B_01.raw.thermo.raw"
## [14] "LFQ.intensity.HL_428_C1_B_02.raw.thermo.raw"
## [15] "LFQ.intensity.HL_428_C1_B_03.raw.thermo.raw"
## [16] "LFQ.intensity.HL_428_C2_A_01.raw.thermo.raw"
## [17] "LFQ.intensity.HL_428_C2_A_02.raw.thermo.raw"
## [18] "LFQ.intensity.HL_428_C2_A_03.raw.thermo.raw"
## [19] "LFQ.intensity.HL_428_C2_B_01.raw.thermo.raw"
## [20] "LFQ.intensity.HL_428_C2_B_02.raw.thermo.raw"
## [21] "LFQ.intensity.HL_428_C2_B_03.raw.thermo.raw"
## [22] "LFQ.intensity.HL_428_C2_C_01.raw.thermo.raw"
## [23] "LFQ.intensity.HL_428_C2_C_02.raw.thermo.raw"
## [24] "LFQ.intensity.HL_428_C2_C_03.raw.thermo.raw"
## [25] "LFQ.intensity.HL_428_RE_1A_15_1.raw.thermo.raw"
## [26] "LFQ.intensity.HL_428_RE_1A_15_2.raw.thermo.raw"
## [27] "LFQ.intensity.HL_428_RE_1A_15_3.raw.thermo.raw"
## [28] "LFQ.intensity.HL_428_RE_1B_15_1.raw.thermo.raw"
## [29] "LFQ.intensity.HL_428_RE_1B_15_2.raw.thermo.raw"
## [30] "LFQ.intensity.HL_428_RE_1B_15_3.raw.thermo.raw"
## [31] "LFQ.intensity.HL_428_RE_1C_15_1.raw.thermo.raw"
## [32] "LFQ.intensity.HL_428_RE_1C_15_2.raw.thermo.raw"
## [33] "LFQ.intensity.HL_428_RE_1C_15_3.raw.thermo.raw"
## [34] "LFQ.intensity.HL_428_RE1_A_01.raw.thermo.raw"
## [35] "LFQ.intensity.HL_428_RE1_A_02.raw.thermo.raw"
## [36] "LFQ.intensity.HL_428_RE1_A_03.raw.thermo.raw"
## [37] "LFQ.intensity.HL_428_RE1_B_01.raw.thermo.raw"
## [38] "LFQ.intensity.HL_428_RE1_B_02.raw.thermo.raw"
## [39] "LFQ.intensity.HL_428_RE1_B_03.raw.thermo.raw"
## [40] "LFQ.intensity.HL_428_RE2_A_02.raw.thermo.raw"
## [41] "LFQ.intensity.HL_428_RE2_A_03.raw.thermo.raw"
## [42] "LFQ.intensity.HL_428_RE2_B_01.raw.thermo.raw"
## [43] "LFQ.intensity.HL_428_RE2_B_02.raw.thermo.raw"
## [44] "LFQ.intensity.HL_428_RE2_B_03.raw.thermo.raw"
## [45] "LFQ.intensity.HL_428_RE2_C_01.raw.thermo.raw"
## [46] "LFQ.intensity.HL_428_RE2_C_02.raw.thermo.raw"
## [47] "LFQ.intensity.HL_428_RE2_C_03.raw.thermo.raw"
rownames(df.LFQ) <- df.prot$Majority.protein.IDs
df.LFQ <- t(df.LFQ)
keep_cols <- colSums(!is.na(df.LFQ)) > 0
removed_col_indices <- which(!keep_cols)
df.LFQ.clean <- df.LFQ[, colSums(!is.na(df.LFQ)) > 0]
df.LFQ.clean <- log2(df.LFQ.clean)
result <- llsImpute(as.matrix(df.LFQ.clean), k = 10, correlation = "pearson", allVariables = TRUE)
df.LFQ <- as.data.frame(completeObs(result))
df.LFQ <- as.data.frame(t(df.LFQ))
df.LFQ$valid_A <- apply(df.LFQ[, 1:24], 1, function(x) sum(!is.na(x)))
df.LFQ$valid_B <- apply(df.LFQ[, 25:47], 1, function(x) sum(!is.na(x)))
df.LFQ.filter <- df.LFQ[df.LFQ$valid_A >= 1 | df.LFQ$valid_B >= 2, 1:47]
print(colnames(df.prot[, c(107:130, 179:193, 195:202)]))
## [1] "Razor...unique.peptides.HL_428_C_1A_15_1.raw.thermo.raw"
## [2] "Razor...unique.peptides.HL_428_C_1A_15_2.raw.thermo.raw"
## [3] "Razor...unique.peptides.HL_428_C_1A_15_3.raw.thermo.raw"
## [4] "Razor...unique.peptides.HL_428_C_1B_15_1.raw.thermo.raw"
## [5] "Razor...unique.peptides.HL_428_C_1B_15_2.raw.thermo.raw"
## [6] "Razor...unique.peptides.HL_428_C_1B_15_3.raw.thermo.raw"
## [7] "Razor...unique.peptides.HL_428_C_1C_15_1.raw.thermo.raw"
## [8] "Razor...unique.peptides.HL_428_C_1C_15_2.raw.thermo.raw"
## [9] "Razor...unique.peptides.HL_428_C_1C_15_3.raw.thermo.raw"
## [10] "Razor...unique.peptides.HL_428_C1_A_01.raw.thermo.raw"
## [11] "Razor...unique.peptides.HL_428_C1_A_02.raw.thermo.raw"
## [12] "Razor...unique.peptides.HL_428_C1_A_03.raw.thermo.raw"
## [13] "Razor...unique.peptides.HL_428_C1_B_01.raw.thermo.raw"
## [14] "Razor...unique.peptides.HL_428_C1_B_02.raw.thermo.raw"
## [15] "Razor...unique.peptides.HL_428_C1_B_03.raw.thermo.raw"
## [16] "Razor...unique.peptides.HL_428_C2_A_01.raw.thermo.raw"
## [17] "Razor...unique.peptides.HL_428_C2_A_02.raw.thermo.raw"
## [18] "Razor...unique.peptides.HL_428_C2_A_03.raw.thermo.raw"
## [19] "Razor...unique.peptides.HL_428_C2_B_01.raw.thermo.raw"
## [20] "Razor...unique.peptides.HL_428_C2_B_02.raw.thermo.raw"
## [21] "Razor...unique.peptides.HL_428_C2_B_03.raw.thermo.raw"
## [22] "Razor...unique.peptides.HL_428_C2_C_01.raw.thermo.raw"
## [23] "Razor...unique.peptides.HL_428_C2_C_02.raw.thermo.raw"
## [24] "Razor...unique.peptides.HL_428_C2_C_03.raw.thermo.raw"
## [25] "Razor...unique.peptides.HL_428_RE_1A_15_1.raw.thermo.raw"
## [26] "Razor...unique.peptides.HL_428_RE_1A_15_2.raw.thermo.raw"
## [27] "Razor...unique.peptides.HL_428_RE_1A_15_3.raw.thermo.raw"
## [28] "Razor...unique.peptides.HL_428_RE_1B_15_1.raw.thermo.raw"
## [29] "Razor...unique.peptides.HL_428_RE_1B_15_2.raw.thermo.raw"
## [30] "Razor...unique.peptides.HL_428_RE_1B_15_3.raw.thermo.raw"
## [31] "Razor...unique.peptides.HL_428_RE_1C_15_1.raw.thermo.raw"
## [32] "Razor...unique.peptides.HL_428_RE_1C_15_2.raw.thermo.raw"
## [33] "Razor...unique.peptides.HL_428_RE_1C_15_3.raw.thermo.raw"
## [34] "Razor...unique.peptides.HL_428_RE1_A_01.raw.thermo.raw"
## [35] "Razor...unique.peptides.HL_428_RE1_A_02.raw.thermo.raw"
## [36] "Razor...unique.peptides.HL_428_RE1_A_03.raw.thermo.raw"
## [37] "Razor...unique.peptides.HL_428_RE1_B_01.raw.thermo.raw"
## [38] "Razor...unique.peptides.HL_428_RE1_B_02.raw.thermo.raw"
## [39] "Razor...unique.peptides.HL_428_RE1_B_03.raw.thermo.raw"
## [40] "Razor...unique.peptides.HL_428_RE2_A_02.raw.thermo.raw"
## [41] "Razor...unique.peptides.HL_428_RE2_A_03.raw.thermo.raw"
## [42] "Razor...unique.peptides.HL_428_RE2_B_01.raw.thermo.raw"
## [43] "Razor...unique.peptides.HL_428_RE2_B_02.raw.thermo.raw"
## [44] "Razor...unique.peptides.HL_428_RE2_B_03.raw.thermo.raw"
## [45] "Razor...unique.peptides.HL_428_RE2_C_01.raw.thermo.raw"
## [46] "Razor...unique.peptides.HL_428_RE2_C_02.raw.thermo.raw"
## [47] "Razor...unique.peptides.HL_428_RE2_C_03.raw.thermo.raw"
pep.count.table <- data.frame(
count = rowMins(as.matrix(df.prot[-removed_col_indices, c(107:130, 179:193, 195:202)])),
row.names = df.prot$Majority.protein.IDs[-removed_col_indices]
)
pep.count.table$count <- pep.count.table$count + 1
protein.matrix <- as.matrix(df.LFQ.filter)
class <- factor(c(rep("C", 24), rep("RE", 23)))
replicate <- factor(c(rep("rep1", 12), rep("rep2", 12),
rep("rep1", 12), rep("rep2", 11)))
design <- model.matrix(~ 0 + class + replicate)
colnames(design)[1:2] <- c("classC", "classRE")
fit_result <- run_deqms_pipeline(
protein.matrix, design, pep.count.table,
contrast_string = "classC-classRE"
)
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : pseudoinverse used at -0.029164
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : neighborhood radius 1.6141
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : reciprocal condition number 2.4576e-16
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : There are other near singularities as well. 2.5121

DEqMS.results <- outputResult(fit_result, coef_col = 1)
rownames(df.prot) <- df.prot$Majority.protein.IDs
DEqMS.results$Gene.name <- df.prot[DEqMS.results$gene, ]$Gene.names
timestamp <- format(Sys.time(), "%Y%m%d_%H%M%S")
write.csv(DEqMS.results,
file = paste0("/proj/proteomics/7_combinedHL/evaluation_paper/abc_2025/raw_log2FCdata/hl428_all/data_", timestamp, ".csv"))
processed_df <- DEqMS.results %>%
dplyr::filter(!is.na(logFC)) %>%
dplyr::mutate(across(where(is.numeric), ~ round(.x, 3)))
datatable(processed_df, rownames = FALSE)
processed_df_interest <- processed_df %>%
dplyr::filter(Gene.name %in% proteins_of_interest)
datatable(processed_df_interest, rownames = FALSE, caption = "proteins of interest")
vp_lfc_limit <- 5
plot_volcano(DEqMS.results, "C vs RE", vp_lfc_limit)

sig <- DEqMS.results %>% dplyr::filter(sca.P.Value <= 0.05)
n_up <- sum(sig$logFC >= 1.0, na.rm = TRUE)
n_down <- sum(sig$logFC <= -1.0, na.rm = TRUE)
n_total <- sum(abs(sig$logFC) >= 1.0, na.rm = TRUE)
cat("Strongly upregulated (logFC ≥ 1.0):", n_up, "\n")
## Strongly upregulated (logFC ≥ 1.0): 194
cat("Strongly downregulated (logFC ≤ -1.0):", n_down, "\n")
## Strongly downregulated (logFC ≤ -1.0): 96
cat("Total |logFC| ≥ 1.0:", n_total, "\n")
## Total |logFC| ≥ 1.0: 290
C vs R
############################################################
# C vs R
############################################################
df <- read.csv("./results_maxquant/hl428_all_galaxy.txt", sep = "\t")
df.prot <- df
df.prot <- df.prot[!df.prot$Reverse == "+", ]
df.prot <- df.prot[!df.prot$Potential.contaminant == "+", ]
df.prot <- df.prot[!df.prot$Only.identified.by.site == "+", ]
df.prot$Gene.names <- extract_gene_names(df.prot$Fasta.headers)
df.LFQ <- df.prot[, c(500:523, 548:571)]
df.LFQ[df.LFQ == 0] <- NA
print(colnames(df.LFQ))
## [1] "LFQ.intensity.HL_428_C_1A_15_1.raw.thermo.raw"
## [2] "LFQ.intensity.HL_428_C_1A_15_2.raw.thermo.raw"
## [3] "LFQ.intensity.HL_428_C_1A_15_3.raw.thermo.raw"
## [4] "LFQ.intensity.HL_428_C_1B_15_1.raw.thermo.raw"
## [5] "LFQ.intensity.HL_428_C_1B_15_2.raw.thermo.raw"
## [6] "LFQ.intensity.HL_428_C_1B_15_3.raw.thermo.raw"
## [7] "LFQ.intensity.HL_428_C_1C_15_1.raw.thermo.raw"
## [8] "LFQ.intensity.HL_428_C_1C_15_2.raw.thermo.raw"
## [9] "LFQ.intensity.HL_428_C_1C_15_3.raw.thermo.raw"
## [10] "LFQ.intensity.HL_428_C1_A_01.raw.thermo.raw"
## [11] "LFQ.intensity.HL_428_C1_A_02.raw.thermo.raw"
## [12] "LFQ.intensity.HL_428_C1_A_03.raw.thermo.raw"
## [13] "LFQ.intensity.HL_428_C1_B_01.raw.thermo.raw"
## [14] "LFQ.intensity.HL_428_C1_B_02.raw.thermo.raw"
## [15] "LFQ.intensity.HL_428_C1_B_03.raw.thermo.raw"
## [16] "LFQ.intensity.HL_428_C2_A_01.raw.thermo.raw"
## [17] "LFQ.intensity.HL_428_C2_A_02.raw.thermo.raw"
## [18] "LFQ.intensity.HL_428_C2_A_03.raw.thermo.raw"
## [19] "LFQ.intensity.HL_428_C2_B_01.raw.thermo.raw"
## [20] "LFQ.intensity.HL_428_C2_B_02.raw.thermo.raw"
## [21] "LFQ.intensity.HL_428_C2_B_03.raw.thermo.raw"
## [22] "LFQ.intensity.HL_428_C2_C_01.raw.thermo.raw"
## [23] "LFQ.intensity.HL_428_C2_C_02.raw.thermo.raw"
## [24] "LFQ.intensity.HL_428_C2_C_03.raw.thermo.raw"
## [25] "LFQ.intensity.HL_428_R_1A_15_1.raw.thermo.raw"
## [26] "LFQ.intensity.HL_428_R_1A_15_2.raw.thermo.raw"
## [27] "LFQ.intensity.HL_428_R_1A_15_3.raw.thermo.raw"
## [28] "LFQ.intensity.HL_428_R_1B_15_1.raw.thermo.raw"
## [29] "LFQ.intensity.HL_428_R_1B_15_2.raw.thermo.raw"
## [30] "LFQ.intensity.HL_428_R_1B_15_3.raw.thermo.raw"
## [31] "LFQ.intensity.HL_428_R_1C_15_1.raw.thermo.raw"
## [32] "LFQ.intensity.HL_428_R_1C_15_2.raw.thermo.raw"
## [33] "LFQ.intensity.HL_428_R_1C_15_3.raw.thermo.raw"
## [34] "LFQ.intensity.HL_428_R1_A_01.raw.thermo.raw"
## [35] "LFQ.intensity.HL_428_R1_A_02.raw.thermo.raw"
## [36] "LFQ.intensity.HL_428_R1_A_03.raw.thermo.raw"
## [37] "LFQ.intensity.HL_428_R1_B_01.raw.thermo.raw"
## [38] "LFQ.intensity.HL_428_R1_B_02.raw.thermo.raw"
## [39] "LFQ.intensity.HL_428_R1_B_03.raw.thermo.raw"
## [40] "LFQ.intensity.HL_428_R2_A_01.raw.thermo.raw"
## [41] "LFQ.intensity.HL_428_R2_A_02.raw.thermo.raw"
## [42] "LFQ.intensity.HL_428_R2_A_03.raw.thermo.raw"
## [43] "LFQ.intensity.HL_428_R2_B_01.raw.thermo.raw"
## [44] "LFQ.intensity.HL_428_R2_B_02.raw.thermo.raw"
## [45] "LFQ.intensity.HL_428_R2_B_03.raw.thermo.raw"
## [46] "LFQ.intensity.HL_428_R2_C_01.raw.thermo.raw"
## [47] "LFQ.intensity.HL_428_R2_C_02.raw.thermo.raw"
## [48] "LFQ.intensity.HL_428_R2_C_03.raw.thermo.raw"
rownames(df.LFQ) <- df.prot$Majority.protein.IDs
df.LFQ <- t(df.LFQ)
keep_cols <- colSums(!is.na(df.LFQ)) > 0
removed_col_indices <- which(!keep_cols)
df.LFQ.clean <- df.LFQ[, colSums(!is.na(df.LFQ)) > 0]
df.LFQ.clean <- log2(df.LFQ.clean)
result <- llsImpute(as.matrix(df.LFQ.clean), k = 10, correlation = "pearson", allVariables = TRUE)
df.LFQ <- as.data.frame(completeObs(result))
df.LFQ <- as.data.frame(t(df.LFQ))
df.LFQ$valid_A <- apply(df.LFQ[, 1:24], 1, function(x) sum(!is.na(x)))
df.LFQ$valid_B <- apply(df.LFQ[, 25:48], 1, function(x) sum(!is.na(x)))
df.LFQ.filter <- df.LFQ[df.LFQ$valid_A >= 1 | df.LFQ$valid_B >= 2, 1:48]
print(colnames(df.prot[, c(107:130, 155:178)]))
## [1] "Razor...unique.peptides.HL_428_C_1A_15_1.raw.thermo.raw"
## [2] "Razor...unique.peptides.HL_428_C_1A_15_2.raw.thermo.raw"
## [3] "Razor...unique.peptides.HL_428_C_1A_15_3.raw.thermo.raw"
## [4] "Razor...unique.peptides.HL_428_C_1B_15_1.raw.thermo.raw"
## [5] "Razor...unique.peptides.HL_428_C_1B_15_2.raw.thermo.raw"
## [6] "Razor...unique.peptides.HL_428_C_1B_15_3.raw.thermo.raw"
## [7] "Razor...unique.peptides.HL_428_C_1C_15_1.raw.thermo.raw"
## [8] "Razor...unique.peptides.HL_428_C_1C_15_2.raw.thermo.raw"
## [9] "Razor...unique.peptides.HL_428_C_1C_15_3.raw.thermo.raw"
## [10] "Razor...unique.peptides.HL_428_C1_A_01.raw.thermo.raw"
## [11] "Razor...unique.peptides.HL_428_C1_A_02.raw.thermo.raw"
## [12] "Razor...unique.peptides.HL_428_C1_A_03.raw.thermo.raw"
## [13] "Razor...unique.peptides.HL_428_C1_B_01.raw.thermo.raw"
## [14] "Razor...unique.peptides.HL_428_C1_B_02.raw.thermo.raw"
## [15] "Razor...unique.peptides.HL_428_C1_B_03.raw.thermo.raw"
## [16] "Razor...unique.peptides.HL_428_C2_A_01.raw.thermo.raw"
## [17] "Razor...unique.peptides.HL_428_C2_A_02.raw.thermo.raw"
## [18] "Razor...unique.peptides.HL_428_C2_A_03.raw.thermo.raw"
## [19] "Razor...unique.peptides.HL_428_C2_B_01.raw.thermo.raw"
## [20] "Razor...unique.peptides.HL_428_C2_B_02.raw.thermo.raw"
## [21] "Razor...unique.peptides.HL_428_C2_B_03.raw.thermo.raw"
## [22] "Razor...unique.peptides.HL_428_C2_C_01.raw.thermo.raw"
## [23] "Razor...unique.peptides.HL_428_C2_C_02.raw.thermo.raw"
## [24] "Razor...unique.peptides.HL_428_C2_C_03.raw.thermo.raw"
## [25] "Razor...unique.peptides.HL_428_R_1A_15_1.raw.thermo.raw"
## [26] "Razor...unique.peptides.HL_428_R_1A_15_2.raw.thermo.raw"
## [27] "Razor...unique.peptides.HL_428_R_1A_15_3.raw.thermo.raw"
## [28] "Razor...unique.peptides.HL_428_R_1B_15_1.raw.thermo.raw"
## [29] "Razor...unique.peptides.HL_428_R_1B_15_2.raw.thermo.raw"
## [30] "Razor...unique.peptides.HL_428_R_1B_15_3.raw.thermo.raw"
## [31] "Razor...unique.peptides.HL_428_R_1C_15_1.raw.thermo.raw"
## [32] "Razor...unique.peptides.HL_428_R_1C_15_2.raw.thermo.raw"
## [33] "Razor...unique.peptides.HL_428_R_1C_15_3.raw.thermo.raw"
## [34] "Razor...unique.peptides.HL_428_R1_A_01.raw.thermo.raw"
## [35] "Razor...unique.peptides.HL_428_R1_A_02.raw.thermo.raw"
## [36] "Razor...unique.peptides.HL_428_R1_A_03.raw.thermo.raw"
## [37] "Razor...unique.peptides.HL_428_R1_B_01.raw.thermo.raw"
## [38] "Razor...unique.peptides.HL_428_R1_B_02.raw.thermo.raw"
## [39] "Razor...unique.peptides.HL_428_R1_B_03.raw.thermo.raw"
## [40] "Razor...unique.peptides.HL_428_R2_A_01.raw.thermo.raw"
## [41] "Razor...unique.peptides.HL_428_R2_A_02.raw.thermo.raw"
## [42] "Razor...unique.peptides.HL_428_R2_A_03.raw.thermo.raw"
## [43] "Razor...unique.peptides.HL_428_R2_B_01.raw.thermo.raw"
## [44] "Razor...unique.peptides.HL_428_R2_B_02.raw.thermo.raw"
## [45] "Razor...unique.peptides.HL_428_R2_B_03.raw.thermo.raw"
## [46] "Razor...unique.peptides.HL_428_R2_C_01.raw.thermo.raw"
## [47] "Razor...unique.peptides.HL_428_R2_C_02.raw.thermo.raw"
## [48] "Razor...unique.peptides.HL_428_R2_C_03.raw.thermo.raw"
pep.count.table <- data.frame(
count = rowMins(as.matrix(df.prot[-removed_col_indices, c(107:130, 155:178)])),
row.names = df.prot$Majority.protein.IDs[-removed_col_indices]
)
pep.count.table$count <- pep.count.table$count + 1
protein.matrix <- as.matrix(df.LFQ.filter)
class <- factor(c(rep("C", 24), rep("R", 24)))
replicate <- factor(c(rep("rep1", 12), rep("rep2", 12),
rep("rep1", 12), rep("rep2", 12)))
design <- model.matrix(~ 0 + class + replicate)
colnames(design)[1:2] <- c("classC", "classR")
fit_result <- run_deqms_pipeline(
protein.matrix, design, pep.count.table,
contrast_string = "classC-classR"
)
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : pseudoinverse used at -0.030112
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : neighborhood radius 1.6151
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : reciprocal condition number 1.7048e-16
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : There are other near singularities as well. 2.5121

DEqMS.results <- outputResult(fit_result, coef_col = 1)
rownames(df.prot) <- df.prot$Majority.protein.IDs
DEqMS.results$Gene.name <- df.prot[DEqMS.results$gene, ]$Gene.names
timestamp <- format(Sys.time(), "%Y%m%d_%H%M%S")
write.csv(DEqMS.results,
file = paste0("/proj/proteomics/7_combinedHL/evaluation_paper/abc_2025/raw_log2FCdata/hl428_all/data_", timestamp, ".csv"))
processed_df <- DEqMS.results %>%
dplyr::filter(!is.na(logFC)) %>%
dplyr::mutate(across(where(is.numeric), ~ round(.x, 3)))
datatable(processed_df, rownames = FALSE)
processed_df_interest <- processed_df %>%
dplyr::filter(Gene.name %in% proteins_of_interest)
datatable(processed_df_interest, rownames = FALSE, caption = "proteins of interest")
vp_lfc_limit <- 5
plot_volcano(DEqMS.results, "C vs R", vp_lfc_limit)

sig <- DEqMS.results %>% dplyr::filter(sca.P.Value <= 0.05)
n_up <- sum(sig$logFC >= 1.0, na.rm = TRUE)
n_down <- sum(sig$logFC <= -1.0, na.rm = TRUE)
n_total <- sum(abs(sig$logFC) >= 1.0, na.rm = TRUE)
cat("Strongly upregulated (logFC ≥ 1.0):", n_up, "\n")
## Strongly upregulated (logFC ≥ 1.0): 16
cat("Strongly downregulated (logFC ≤ -1.0):", n_down, "\n")
## Strongly downregulated (logFC ≤ -1.0): 28
cat("Total |logFC| ≥ 1.0:", n_total, "\n")
## Total |logFC| ≥ 1.0: 44
E vs R
############################################################
# E vs R
############################################################
df <- read.csv("./results_maxquant/hl428_all_galaxy.txt", sep = "\t")
df.prot <- df
df.prot <- df.prot[!df.prot$Reverse == "+", ]
df.prot <- df.prot[!df.prot$Potential.contaminant == "+", ]
df.prot <- df.prot[!df.prot$Only.identified.by.site == "+", ]
df.prot$Gene.names <- extract_gene_names(df.prot$Fasta.headers)
df.LFQ <- df.prot[, c(524:547, 548:571)]
df.LFQ[df.LFQ == 0] <- NA
print(colnames(df.LFQ))
## [1] "LFQ.intensity.HL_428_E_1A_15_1.raw.thermo.raw"
## [2] "LFQ.intensity.HL_428_E_1A_15_2.raw.thermo.raw"
## [3] "LFQ.intensity.HL_428_E_1A_15_3.raw.thermo.raw"
## [4] "LFQ.intensity.HL_428_E_1B_15_1.raw.thermo.raw"
## [5] "LFQ.intensity.HL_428_E_1B_15_2.raw.thermo.raw"
## [6] "LFQ.intensity.HL_428_E_1B_15_3.raw.thermo.raw"
## [7] "LFQ.intensity.HL_428_E_1C_15_1.raw.thermo.raw"
## [8] "LFQ.intensity.HL_428_E_1C_15_2.raw.thermo.raw"
## [9] "LFQ.intensity.HL_428_E_1C_15_3.raw.thermo.raw"
## [10] "LFQ.intensity.HL_428_E1_A_01.raw.thermo.raw"
## [11] "LFQ.intensity.HL_428_E1_A_02.raw.thermo.raw"
## [12] "LFQ.intensity.HL_428_E1_A_03.raw.thermo.raw"
## [13] "LFQ.intensity.HL_428_E1_B_01.raw.thermo.raw"
## [14] "LFQ.intensity.HL_428_E1_B_02.raw.thermo.raw"
## [15] "LFQ.intensity.HL_428_E1_B_03.raw.thermo.raw"
## [16] "LFQ.intensity.HL_428_E2_A_01.raw.thermo.raw"
## [17] "LFQ.intensity.HL_428_E2_A_02.raw.thermo.raw"
## [18] "LFQ.intensity.HL_428_E2_A_03.raw.thermo.raw"
## [19] "LFQ.intensity.HL_428_E2_B_01.raw.thermo.raw"
## [20] "LFQ.intensity.HL_428_E2_B_02.raw.thermo.raw"
## [21] "LFQ.intensity.HL_428_E2_B_03.raw.thermo.raw"
## [22] "LFQ.intensity.HL_428_E2_C_01.raw.thermo.raw"
## [23] "LFQ.intensity.HL_428_E2_C_02.raw.thermo.raw"
## [24] "LFQ.intensity.HL_428_E2_C_03.raw.thermo.raw"
## [25] "LFQ.intensity.HL_428_R_1A_15_1.raw.thermo.raw"
## [26] "LFQ.intensity.HL_428_R_1A_15_2.raw.thermo.raw"
## [27] "LFQ.intensity.HL_428_R_1A_15_3.raw.thermo.raw"
## [28] "LFQ.intensity.HL_428_R_1B_15_1.raw.thermo.raw"
## [29] "LFQ.intensity.HL_428_R_1B_15_2.raw.thermo.raw"
## [30] "LFQ.intensity.HL_428_R_1B_15_3.raw.thermo.raw"
## [31] "LFQ.intensity.HL_428_R_1C_15_1.raw.thermo.raw"
## [32] "LFQ.intensity.HL_428_R_1C_15_2.raw.thermo.raw"
## [33] "LFQ.intensity.HL_428_R_1C_15_3.raw.thermo.raw"
## [34] "LFQ.intensity.HL_428_R1_A_01.raw.thermo.raw"
## [35] "LFQ.intensity.HL_428_R1_A_02.raw.thermo.raw"
## [36] "LFQ.intensity.HL_428_R1_A_03.raw.thermo.raw"
## [37] "LFQ.intensity.HL_428_R1_B_01.raw.thermo.raw"
## [38] "LFQ.intensity.HL_428_R1_B_02.raw.thermo.raw"
## [39] "LFQ.intensity.HL_428_R1_B_03.raw.thermo.raw"
## [40] "LFQ.intensity.HL_428_R2_A_01.raw.thermo.raw"
## [41] "LFQ.intensity.HL_428_R2_A_02.raw.thermo.raw"
## [42] "LFQ.intensity.HL_428_R2_A_03.raw.thermo.raw"
## [43] "LFQ.intensity.HL_428_R2_B_01.raw.thermo.raw"
## [44] "LFQ.intensity.HL_428_R2_B_02.raw.thermo.raw"
## [45] "LFQ.intensity.HL_428_R2_B_03.raw.thermo.raw"
## [46] "LFQ.intensity.HL_428_R2_C_01.raw.thermo.raw"
## [47] "LFQ.intensity.HL_428_R2_C_02.raw.thermo.raw"
## [48] "LFQ.intensity.HL_428_R2_C_03.raw.thermo.raw"
rownames(df.LFQ) <- df.prot$Majority.protein.IDs
df.LFQ <- t(df.LFQ)
keep_cols <- colSums(!is.na(df.LFQ)) > 0
removed_col_indices <- which(!keep_cols)
df.LFQ.clean <- df.LFQ[, colSums(!is.na(df.LFQ)) > 0]
df.LFQ.clean <- log2(df.LFQ.clean)
result <- llsImpute(as.matrix(df.LFQ.clean), k = 10, correlation = "pearson", allVariables = TRUE)
df.LFQ <- as.data.frame(completeObs(result))
df.LFQ <- as.data.frame(t(df.LFQ))
df.LFQ$valid_A <- apply(df.LFQ[, 1:24], 1, function(x) sum(!is.na(x)))
df.LFQ$valid_B <- apply(df.LFQ[, 25:48], 1, function(x) sum(!is.na(x)))
df.LFQ.filter <- df.LFQ[df.LFQ$valid_A >= 2 | df.LFQ$valid_B >= 2, 1:48]
print(colnames(df.prot[, c(131:154, 155:178)]))
## [1] "Razor...unique.peptides.HL_428_E_1A_15_1.raw.thermo.raw"
## [2] "Razor...unique.peptides.HL_428_E_1A_15_2.raw.thermo.raw"
## [3] "Razor...unique.peptides.HL_428_E_1A_15_3.raw.thermo.raw"
## [4] "Razor...unique.peptides.HL_428_E_1B_15_1.raw.thermo.raw"
## [5] "Razor...unique.peptides.HL_428_E_1B_15_2.raw.thermo.raw"
## [6] "Razor...unique.peptides.HL_428_E_1B_15_3.raw.thermo.raw"
## [7] "Razor...unique.peptides.HL_428_E_1C_15_1.raw.thermo.raw"
## [8] "Razor...unique.peptides.HL_428_E_1C_15_2.raw.thermo.raw"
## [9] "Razor...unique.peptides.HL_428_E_1C_15_3.raw.thermo.raw"
## [10] "Razor...unique.peptides.HL_428_E1_A_01.raw.thermo.raw"
## [11] "Razor...unique.peptides.HL_428_E1_A_02.raw.thermo.raw"
## [12] "Razor...unique.peptides.HL_428_E1_A_03.raw.thermo.raw"
## [13] "Razor...unique.peptides.HL_428_E1_B_01.raw.thermo.raw"
## [14] "Razor...unique.peptides.HL_428_E1_B_02.raw.thermo.raw"
## [15] "Razor...unique.peptides.HL_428_E1_B_03.raw.thermo.raw"
## [16] "Razor...unique.peptides.HL_428_E2_A_01.raw.thermo.raw"
## [17] "Razor...unique.peptides.HL_428_E2_A_02.raw.thermo.raw"
## [18] "Razor...unique.peptides.HL_428_E2_A_03.raw.thermo.raw"
## [19] "Razor...unique.peptides.HL_428_E2_B_01.raw.thermo.raw"
## [20] "Razor...unique.peptides.HL_428_E2_B_02.raw.thermo.raw"
## [21] "Razor...unique.peptides.HL_428_E2_B_03.raw.thermo.raw"
## [22] "Razor...unique.peptides.HL_428_E2_C_01.raw.thermo.raw"
## [23] "Razor...unique.peptides.HL_428_E2_C_02.raw.thermo.raw"
## [24] "Razor...unique.peptides.HL_428_E2_C_03.raw.thermo.raw"
## [25] "Razor...unique.peptides.HL_428_R_1A_15_1.raw.thermo.raw"
## [26] "Razor...unique.peptides.HL_428_R_1A_15_2.raw.thermo.raw"
## [27] "Razor...unique.peptides.HL_428_R_1A_15_3.raw.thermo.raw"
## [28] "Razor...unique.peptides.HL_428_R_1B_15_1.raw.thermo.raw"
## [29] "Razor...unique.peptides.HL_428_R_1B_15_2.raw.thermo.raw"
## [30] "Razor...unique.peptides.HL_428_R_1B_15_3.raw.thermo.raw"
## [31] "Razor...unique.peptides.HL_428_R_1C_15_1.raw.thermo.raw"
## [32] "Razor...unique.peptides.HL_428_R_1C_15_2.raw.thermo.raw"
## [33] "Razor...unique.peptides.HL_428_R_1C_15_3.raw.thermo.raw"
## [34] "Razor...unique.peptides.HL_428_R1_A_01.raw.thermo.raw"
## [35] "Razor...unique.peptides.HL_428_R1_A_02.raw.thermo.raw"
## [36] "Razor...unique.peptides.HL_428_R1_A_03.raw.thermo.raw"
## [37] "Razor...unique.peptides.HL_428_R1_B_01.raw.thermo.raw"
## [38] "Razor...unique.peptides.HL_428_R1_B_02.raw.thermo.raw"
## [39] "Razor...unique.peptides.HL_428_R1_B_03.raw.thermo.raw"
## [40] "Razor...unique.peptides.HL_428_R2_A_01.raw.thermo.raw"
## [41] "Razor...unique.peptides.HL_428_R2_A_02.raw.thermo.raw"
## [42] "Razor...unique.peptides.HL_428_R2_A_03.raw.thermo.raw"
## [43] "Razor...unique.peptides.HL_428_R2_B_01.raw.thermo.raw"
## [44] "Razor...unique.peptides.HL_428_R2_B_02.raw.thermo.raw"
## [45] "Razor...unique.peptides.HL_428_R2_B_03.raw.thermo.raw"
## [46] "Razor...unique.peptides.HL_428_R2_C_01.raw.thermo.raw"
## [47] "Razor...unique.peptides.HL_428_R2_C_02.raw.thermo.raw"
## [48] "Razor...unique.peptides.HL_428_R2_C_03.raw.thermo.raw"
pep.count.table <- data.frame(
count = rowMins(as.matrix(df.prot[-removed_col_indices, c(131:154, 155:178)])),
row.names = df.prot$Majority.protein.IDs[-removed_col_indices]
)
pep.count.table$count <- pep.count.table$count + 1
protein.matrix <- as.matrix(df.LFQ.filter)
class <- factor(c(rep("E", 24), rep("R", 24)))
replicate <- factor(c(rep("rep1", 12), rep("rep2", 12),
rep("rep1", 12), rep("rep2", 12)))
design <- model.matrix(~ 0 + class + replicate)
colnames(design)[1:2] <- c("classE", "classR")
fit_result <- run_deqms_pipeline(
protein.matrix, design, pep.count.table,
contrast_string = "classE-classR"
)
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : pseudoinverse used at -0.029654
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : neighborhood radius 1.6146
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : There are other near singularities as well. 2.5121

DEqMS.results <- outputResult(fit_result, coef_col = 1)
rownames(df.prot) <- df.prot$Majority.protein.IDs
DEqMS.results$Gene.name <- df.prot[DEqMS.results$gene, ]$Gene.names
timestamp <- format(Sys.time(), "%Y%m%d_%H%M%S")
write.csv(DEqMS.results,
file = paste0("/proj/proteomics/7_combinedHL/evaluation_paper/abc_2025/raw_log2FCdata/hl428_all/data_", timestamp, ".csv"))
processed_df <- DEqMS.results %>%
dplyr::filter(!is.na(logFC)) %>%
dplyr::mutate(across(where(is.numeric), ~ round(.x, 3)))
datatable(processed_df, rownames = FALSE)
processed_df_interest <- processed_df %>%
dplyr::filter(Gene.name %in% proteins_of_interest)
datatable(processed_df_interest, rownames = FALSE, caption = "proteins of interest")
vp_lfc_limit <- 5
plot_volcano(DEqMS.results, "E vs R", vp_lfc_limit)

sig <- DEqMS.results %>% dplyr::filter(sca.P.Value <= 0.05)
n_up <- sum(sig$logFC >= 1.0, na.rm = TRUE)
n_down <- sum(sig$logFC <= -1.0, na.rm = TRUE)
n_total <- sum(abs(sig$logFC) >= 1.0, na.rm = TRUE)
cat("Strongly upregulated (logFC ≥ 1.0):", n_up, "\n")
## Strongly upregulated (logFC ≥ 1.0): 6
cat("Strongly downregulated (logFC ≤ -1.0):", n_down, "\n")
## Strongly downregulated (logFC ≤ -1.0): 4
cat("Total |logFC| ≥ 1.0:", n_total, "\n")
## Total |logFC| ≥ 1.0: 10
E vs RE
############################################################
# E vs RE
############################################################
df <- read.csv("./results_maxquant/hl428_all_galaxy.txt", sep = "\t")
df.prot <- df
df.prot <- df.prot[!df.prot$Reverse == "+", ]
df.prot <- df.prot[!df.prot$Potential.contaminant == "+", ]
df.prot <- df.prot[!df.prot$Only.identified.by.site == "+", ]
df.prot$Gene.names <- extract_gene_names(df.prot$Fasta.headers)
df.LFQ <- df.prot[, c(524:547, 572:586, 588:595)]
df.LFQ[df.LFQ == 0] <- NA
print(colnames(df.LFQ))
## [1] "LFQ.intensity.HL_428_E_1A_15_1.raw.thermo.raw"
## [2] "LFQ.intensity.HL_428_E_1A_15_2.raw.thermo.raw"
## [3] "LFQ.intensity.HL_428_E_1A_15_3.raw.thermo.raw"
## [4] "LFQ.intensity.HL_428_E_1B_15_1.raw.thermo.raw"
## [5] "LFQ.intensity.HL_428_E_1B_15_2.raw.thermo.raw"
## [6] "LFQ.intensity.HL_428_E_1B_15_3.raw.thermo.raw"
## [7] "LFQ.intensity.HL_428_E_1C_15_1.raw.thermo.raw"
## [8] "LFQ.intensity.HL_428_E_1C_15_2.raw.thermo.raw"
## [9] "LFQ.intensity.HL_428_E_1C_15_3.raw.thermo.raw"
## [10] "LFQ.intensity.HL_428_E1_A_01.raw.thermo.raw"
## [11] "LFQ.intensity.HL_428_E1_A_02.raw.thermo.raw"
## [12] "LFQ.intensity.HL_428_E1_A_03.raw.thermo.raw"
## [13] "LFQ.intensity.HL_428_E1_B_01.raw.thermo.raw"
## [14] "LFQ.intensity.HL_428_E1_B_02.raw.thermo.raw"
## [15] "LFQ.intensity.HL_428_E1_B_03.raw.thermo.raw"
## [16] "LFQ.intensity.HL_428_E2_A_01.raw.thermo.raw"
## [17] "LFQ.intensity.HL_428_E2_A_02.raw.thermo.raw"
## [18] "LFQ.intensity.HL_428_E2_A_03.raw.thermo.raw"
## [19] "LFQ.intensity.HL_428_E2_B_01.raw.thermo.raw"
## [20] "LFQ.intensity.HL_428_E2_B_02.raw.thermo.raw"
## [21] "LFQ.intensity.HL_428_E2_B_03.raw.thermo.raw"
## [22] "LFQ.intensity.HL_428_E2_C_01.raw.thermo.raw"
## [23] "LFQ.intensity.HL_428_E2_C_02.raw.thermo.raw"
## [24] "LFQ.intensity.HL_428_E2_C_03.raw.thermo.raw"
## [25] "LFQ.intensity.HL_428_RE_1A_15_1.raw.thermo.raw"
## [26] "LFQ.intensity.HL_428_RE_1A_15_2.raw.thermo.raw"
## [27] "LFQ.intensity.HL_428_RE_1A_15_3.raw.thermo.raw"
## [28] "LFQ.intensity.HL_428_RE_1B_15_1.raw.thermo.raw"
## [29] "LFQ.intensity.HL_428_RE_1B_15_2.raw.thermo.raw"
## [30] "LFQ.intensity.HL_428_RE_1B_15_3.raw.thermo.raw"
## [31] "LFQ.intensity.HL_428_RE_1C_15_1.raw.thermo.raw"
## [32] "LFQ.intensity.HL_428_RE_1C_15_2.raw.thermo.raw"
## [33] "LFQ.intensity.HL_428_RE_1C_15_3.raw.thermo.raw"
## [34] "LFQ.intensity.HL_428_RE1_A_01.raw.thermo.raw"
## [35] "LFQ.intensity.HL_428_RE1_A_02.raw.thermo.raw"
## [36] "LFQ.intensity.HL_428_RE1_A_03.raw.thermo.raw"
## [37] "LFQ.intensity.HL_428_RE1_B_01.raw.thermo.raw"
## [38] "LFQ.intensity.HL_428_RE1_B_02.raw.thermo.raw"
## [39] "LFQ.intensity.HL_428_RE1_B_03.raw.thermo.raw"
## [40] "LFQ.intensity.HL_428_RE2_A_02.raw.thermo.raw"
## [41] "LFQ.intensity.HL_428_RE2_A_03.raw.thermo.raw"
## [42] "LFQ.intensity.HL_428_RE2_B_01.raw.thermo.raw"
## [43] "LFQ.intensity.HL_428_RE2_B_02.raw.thermo.raw"
## [44] "LFQ.intensity.HL_428_RE2_B_03.raw.thermo.raw"
## [45] "LFQ.intensity.HL_428_RE2_C_01.raw.thermo.raw"
## [46] "LFQ.intensity.HL_428_RE2_C_02.raw.thermo.raw"
## [47] "LFQ.intensity.HL_428_RE2_C_03.raw.thermo.raw"
rownames(df.LFQ) <- df.prot$Majority.protein.IDs
df.LFQ <- t(df.LFQ)
keep_cols <- colSums(!is.na(df.LFQ)) > 0
removed_col_indices <- which(!keep_cols)
df.LFQ.clean <- df.LFQ[, colSums(!is.na(df.LFQ)) > 0]
df.LFQ.clean <- log2(df.LFQ.clean)
result <- llsImpute(as.matrix(df.LFQ.clean), k = 10, correlation = "pearson", allVariables = TRUE)
df.LFQ <- as.data.frame(completeObs(result))
df.LFQ <- as.data.frame(t(df.LFQ))
df.LFQ$valid_A <- apply(df.LFQ[, 1:24], 1, function(x) sum(!is.na(x)))
df.LFQ$valid_B <- apply(df.LFQ[, 25:47], 1, function(x) sum(!is.na(x)))
df.LFQ.filter <- df.LFQ[df.LFQ$valid_A >= 2 | df.LFQ$valid_B >= 2, 1:47]
print(colnames(df.prot[, c(131:154, 179:193, 195:202)]))
## [1] "Razor...unique.peptides.HL_428_E_1A_15_1.raw.thermo.raw"
## [2] "Razor...unique.peptides.HL_428_E_1A_15_2.raw.thermo.raw"
## [3] "Razor...unique.peptides.HL_428_E_1A_15_3.raw.thermo.raw"
## [4] "Razor...unique.peptides.HL_428_E_1B_15_1.raw.thermo.raw"
## [5] "Razor...unique.peptides.HL_428_E_1B_15_2.raw.thermo.raw"
## [6] "Razor...unique.peptides.HL_428_E_1B_15_3.raw.thermo.raw"
## [7] "Razor...unique.peptides.HL_428_E_1C_15_1.raw.thermo.raw"
## [8] "Razor...unique.peptides.HL_428_E_1C_15_2.raw.thermo.raw"
## [9] "Razor...unique.peptides.HL_428_E_1C_15_3.raw.thermo.raw"
## [10] "Razor...unique.peptides.HL_428_E1_A_01.raw.thermo.raw"
## [11] "Razor...unique.peptides.HL_428_E1_A_02.raw.thermo.raw"
## [12] "Razor...unique.peptides.HL_428_E1_A_03.raw.thermo.raw"
## [13] "Razor...unique.peptides.HL_428_E1_B_01.raw.thermo.raw"
## [14] "Razor...unique.peptides.HL_428_E1_B_02.raw.thermo.raw"
## [15] "Razor...unique.peptides.HL_428_E1_B_03.raw.thermo.raw"
## [16] "Razor...unique.peptides.HL_428_E2_A_01.raw.thermo.raw"
## [17] "Razor...unique.peptides.HL_428_E2_A_02.raw.thermo.raw"
## [18] "Razor...unique.peptides.HL_428_E2_A_03.raw.thermo.raw"
## [19] "Razor...unique.peptides.HL_428_E2_B_01.raw.thermo.raw"
## [20] "Razor...unique.peptides.HL_428_E2_B_02.raw.thermo.raw"
## [21] "Razor...unique.peptides.HL_428_E2_B_03.raw.thermo.raw"
## [22] "Razor...unique.peptides.HL_428_E2_C_01.raw.thermo.raw"
## [23] "Razor...unique.peptides.HL_428_E2_C_02.raw.thermo.raw"
## [24] "Razor...unique.peptides.HL_428_E2_C_03.raw.thermo.raw"
## [25] "Razor...unique.peptides.HL_428_RE_1A_15_1.raw.thermo.raw"
## [26] "Razor...unique.peptides.HL_428_RE_1A_15_2.raw.thermo.raw"
## [27] "Razor...unique.peptides.HL_428_RE_1A_15_3.raw.thermo.raw"
## [28] "Razor...unique.peptides.HL_428_RE_1B_15_1.raw.thermo.raw"
## [29] "Razor...unique.peptides.HL_428_RE_1B_15_2.raw.thermo.raw"
## [30] "Razor...unique.peptides.HL_428_RE_1B_15_3.raw.thermo.raw"
## [31] "Razor...unique.peptides.HL_428_RE_1C_15_1.raw.thermo.raw"
## [32] "Razor...unique.peptides.HL_428_RE_1C_15_2.raw.thermo.raw"
## [33] "Razor...unique.peptides.HL_428_RE_1C_15_3.raw.thermo.raw"
## [34] "Razor...unique.peptides.HL_428_RE1_A_01.raw.thermo.raw"
## [35] "Razor...unique.peptides.HL_428_RE1_A_02.raw.thermo.raw"
## [36] "Razor...unique.peptides.HL_428_RE1_A_03.raw.thermo.raw"
## [37] "Razor...unique.peptides.HL_428_RE1_B_01.raw.thermo.raw"
## [38] "Razor...unique.peptides.HL_428_RE1_B_02.raw.thermo.raw"
## [39] "Razor...unique.peptides.HL_428_RE1_B_03.raw.thermo.raw"
## [40] "Razor...unique.peptides.HL_428_RE2_A_02.raw.thermo.raw"
## [41] "Razor...unique.peptides.HL_428_RE2_A_03.raw.thermo.raw"
## [42] "Razor...unique.peptides.HL_428_RE2_B_01.raw.thermo.raw"
## [43] "Razor...unique.peptides.HL_428_RE2_B_02.raw.thermo.raw"
## [44] "Razor...unique.peptides.HL_428_RE2_B_03.raw.thermo.raw"
## [45] "Razor...unique.peptides.HL_428_RE2_C_01.raw.thermo.raw"
## [46] "Razor...unique.peptides.HL_428_RE2_C_02.raw.thermo.raw"
## [47] "Razor...unique.peptides.HL_428_RE2_C_03.raw.thermo.raw"
pep.count.table <- data.frame(
count = rowMins(as.matrix(df.prot[-removed_col_indices, c(131:154, 179:193, 195:202)])),
row.names = df.prot$Majority.protein.IDs[-removed_col_indices]
)
pep.count.table$count <- pep.count.table$count + 1
protein.matrix <- as.matrix(df.LFQ.filter)
class <- factor(c(rep("E", 24), rep("RE", 23)))
replicate <- factor(c(rep("rep1", 12), rep("rep2", 12),
rep("rep1", 12), rep("rep2", 11)))
design <- model.matrix(~ 0 + class + replicate)
colnames(design)[1:2] <- c("classE", "classRE")
fit_result <- run_deqms_pipeline(
protein.matrix, design, pep.count.table,
contrast_string = "classE-classRE"
)
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : pseudoinverse used at -0.028219
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : neighborhood radius 1.6132
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : There are other near singularities as well. 2.5121

DEqMS.results <- outputResult(fit_result, coef_col = 1)
rownames(df.prot) <- df.prot$Majority.protein.IDs
DEqMS.results$Gene.name <- df.prot[DEqMS.results$gene, ]$Gene.names
timestamp <- format(Sys.time(), "%Y%m%d_%H%M%S")
write.csv(DEqMS.results,
file = paste0("/proj/proteomics/7_combinedHL/evaluation_paper/abc_2025/raw_log2FCdata/hl428_all/data_", timestamp, ".csv"))
processed_df <- DEqMS.results %>%
dplyr::filter(!is.na(logFC)) %>%
dplyr::mutate(across(where(is.numeric), ~ round(.x, 3)))
datatable(processed_df, rownames = FALSE)
processed_df_interest <- processed_df %>%
dplyr::filter(Gene.name %in% proteins_of_interest)
datatable(processed_df_interest, rownames = FALSE, caption = "proteins of interest")
vp_lfc_limit <- 5
plot_volcano(DEqMS.results, "E vs RE", vp_lfc_limit)

sig <- DEqMS.results %>% dplyr::filter(sca.P.Value <= 0.05)
n_up <- sum(sig$logFC >= 1.0, na.rm = TRUE)
n_down <- sum(sig$logFC <= -1.0, na.rm = TRUE)
n_total <- sum(abs(sig$logFC) >= 1.0, na.rm = TRUE)
cat("Strongly upregulated (logFC ≥ 1.0):", n_up, "\n")
## Strongly upregulated (logFC ≥ 1.0): 49
cat("Strongly downregulated (logFC ≤ -1.0):", n_down, "\n")
## Strongly downregulated (logFC ≤ -1.0): 20
cat("Total |logFC| ≥ 1.0:", n_total, "\n")
## Total |logFC| ≥ 1.0: 69
R vs RE
############################################################
# R vs RE
############################################################
df <- read.csv("./results_maxquant/hl428_all_galaxy.txt", sep = "\t")
df.prot <- df
df.prot <- df.prot[!df.prot$Reverse == "+", ]
df.prot <- df.prot[!df.prot$Potential.contaminant == "+", ]
df.prot <- df.prot[!df.prot$Only.identified.by.site == "+", ]
df.prot$Gene.names <- extract_gene_names(df.prot$Fasta.headers)
df.LFQ <- df.prot[, c(548:571, 572:586, 588:595)]
df.LFQ[df.LFQ == 0] <- NA
print(colnames(df.LFQ))
## [1] "LFQ.intensity.HL_428_R_1A_15_1.raw.thermo.raw"
## [2] "LFQ.intensity.HL_428_R_1A_15_2.raw.thermo.raw"
## [3] "LFQ.intensity.HL_428_R_1A_15_3.raw.thermo.raw"
## [4] "LFQ.intensity.HL_428_R_1B_15_1.raw.thermo.raw"
## [5] "LFQ.intensity.HL_428_R_1B_15_2.raw.thermo.raw"
## [6] "LFQ.intensity.HL_428_R_1B_15_3.raw.thermo.raw"
## [7] "LFQ.intensity.HL_428_R_1C_15_1.raw.thermo.raw"
## [8] "LFQ.intensity.HL_428_R_1C_15_2.raw.thermo.raw"
## [9] "LFQ.intensity.HL_428_R_1C_15_3.raw.thermo.raw"
## [10] "LFQ.intensity.HL_428_R1_A_01.raw.thermo.raw"
## [11] "LFQ.intensity.HL_428_R1_A_02.raw.thermo.raw"
## [12] "LFQ.intensity.HL_428_R1_A_03.raw.thermo.raw"
## [13] "LFQ.intensity.HL_428_R1_B_01.raw.thermo.raw"
## [14] "LFQ.intensity.HL_428_R1_B_02.raw.thermo.raw"
## [15] "LFQ.intensity.HL_428_R1_B_03.raw.thermo.raw"
## [16] "LFQ.intensity.HL_428_R2_A_01.raw.thermo.raw"
## [17] "LFQ.intensity.HL_428_R2_A_02.raw.thermo.raw"
## [18] "LFQ.intensity.HL_428_R2_A_03.raw.thermo.raw"
## [19] "LFQ.intensity.HL_428_R2_B_01.raw.thermo.raw"
## [20] "LFQ.intensity.HL_428_R2_B_02.raw.thermo.raw"
## [21] "LFQ.intensity.HL_428_R2_B_03.raw.thermo.raw"
## [22] "LFQ.intensity.HL_428_R2_C_01.raw.thermo.raw"
## [23] "LFQ.intensity.HL_428_R2_C_02.raw.thermo.raw"
## [24] "LFQ.intensity.HL_428_R2_C_03.raw.thermo.raw"
## [25] "LFQ.intensity.HL_428_RE_1A_15_1.raw.thermo.raw"
## [26] "LFQ.intensity.HL_428_RE_1A_15_2.raw.thermo.raw"
## [27] "LFQ.intensity.HL_428_RE_1A_15_3.raw.thermo.raw"
## [28] "LFQ.intensity.HL_428_RE_1B_15_1.raw.thermo.raw"
## [29] "LFQ.intensity.HL_428_RE_1B_15_2.raw.thermo.raw"
## [30] "LFQ.intensity.HL_428_RE_1B_15_3.raw.thermo.raw"
## [31] "LFQ.intensity.HL_428_RE_1C_15_1.raw.thermo.raw"
## [32] "LFQ.intensity.HL_428_RE_1C_15_2.raw.thermo.raw"
## [33] "LFQ.intensity.HL_428_RE_1C_15_3.raw.thermo.raw"
## [34] "LFQ.intensity.HL_428_RE1_A_01.raw.thermo.raw"
## [35] "LFQ.intensity.HL_428_RE1_A_02.raw.thermo.raw"
## [36] "LFQ.intensity.HL_428_RE1_A_03.raw.thermo.raw"
## [37] "LFQ.intensity.HL_428_RE1_B_01.raw.thermo.raw"
## [38] "LFQ.intensity.HL_428_RE1_B_02.raw.thermo.raw"
## [39] "LFQ.intensity.HL_428_RE1_B_03.raw.thermo.raw"
## [40] "LFQ.intensity.HL_428_RE2_A_02.raw.thermo.raw"
## [41] "LFQ.intensity.HL_428_RE2_A_03.raw.thermo.raw"
## [42] "LFQ.intensity.HL_428_RE2_B_01.raw.thermo.raw"
## [43] "LFQ.intensity.HL_428_RE2_B_02.raw.thermo.raw"
## [44] "LFQ.intensity.HL_428_RE2_B_03.raw.thermo.raw"
## [45] "LFQ.intensity.HL_428_RE2_C_01.raw.thermo.raw"
## [46] "LFQ.intensity.HL_428_RE2_C_02.raw.thermo.raw"
## [47] "LFQ.intensity.HL_428_RE2_C_03.raw.thermo.raw"
rownames(df.LFQ) <- df.prot$Majority.protein.IDs
df.LFQ <- t(df.LFQ)
keep_cols <- colSums(!is.na(df.LFQ)) > 0
removed_col_indices <- which(!keep_cols)
df.LFQ.clean <- df.LFQ[, colSums(!is.na(df.LFQ)) > 0]
df.LFQ.clean <- log2(df.LFQ.clean)
result <- llsImpute(as.matrix(df.LFQ.clean), k = 10, correlation = "pearson", allVariables = TRUE)
df.LFQ <- as.data.frame(completeObs(result))
df.LFQ <- as.data.frame(t(df.LFQ))
df.LFQ$valid_A <- apply(df.LFQ[, 1:24], 1, function(x) sum(!is.na(x)))
df.LFQ$valid_B <- apply(df.LFQ[, 25:47], 1, function(x) sum(!is.na(x)))
df.LFQ.filter <- df.LFQ[df.LFQ$valid_A >= 2 | df.LFQ$valid_B >= 2, 1:47]
print(colnames(df.prot[, c(155:178, 179:193, 195:202)]))
## [1] "Razor...unique.peptides.HL_428_R_1A_15_1.raw.thermo.raw"
## [2] "Razor...unique.peptides.HL_428_R_1A_15_2.raw.thermo.raw"
## [3] "Razor...unique.peptides.HL_428_R_1A_15_3.raw.thermo.raw"
## [4] "Razor...unique.peptides.HL_428_R_1B_15_1.raw.thermo.raw"
## [5] "Razor...unique.peptides.HL_428_R_1B_15_2.raw.thermo.raw"
## [6] "Razor...unique.peptides.HL_428_R_1B_15_3.raw.thermo.raw"
## [7] "Razor...unique.peptides.HL_428_R_1C_15_1.raw.thermo.raw"
## [8] "Razor...unique.peptides.HL_428_R_1C_15_2.raw.thermo.raw"
## [9] "Razor...unique.peptides.HL_428_R_1C_15_3.raw.thermo.raw"
## [10] "Razor...unique.peptides.HL_428_R1_A_01.raw.thermo.raw"
## [11] "Razor...unique.peptides.HL_428_R1_A_02.raw.thermo.raw"
## [12] "Razor...unique.peptides.HL_428_R1_A_03.raw.thermo.raw"
## [13] "Razor...unique.peptides.HL_428_R1_B_01.raw.thermo.raw"
## [14] "Razor...unique.peptides.HL_428_R1_B_02.raw.thermo.raw"
## [15] "Razor...unique.peptides.HL_428_R1_B_03.raw.thermo.raw"
## [16] "Razor...unique.peptides.HL_428_R2_A_01.raw.thermo.raw"
## [17] "Razor...unique.peptides.HL_428_R2_A_02.raw.thermo.raw"
## [18] "Razor...unique.peptides.HL_428_R2_A_03.raw.thermo.raw"
## [19] "Razor...unique.peptides.HL_428_R2_B_01.raw.thermo.raw"
## [20] "Razor...unique.peptides.HL_428_R2_B_02.raw.thermo.raw"
## [21] "Razor...unique.peptides.HL_428_R2_B_03.raw.thermo.raw"
## [22] "Razor...unique.peptides.HL_428_R2_C_01.raw.thermo.raw"
## [23] "Razor...unique.peptides.HL_428_R2_C_02.raw.thermo.raw"
## [24] "Razor...unique.peptides.HL_428_R2_C_03.raw.thermo.raw"
## [25] "Razor...unique.peptides.HL_428_RE_1A_15_1.raw.thermo.raw"
## [26] "Razor...unique.peptides.HL_428_RE_1A_15_2.raw.thermo.raw"
## [27] "Razor...unique.peptides.HL_428_RE_1A_15_3.raw.thermo.raw"
## [28] "Razor...unique.peptides.HL_428_RE_1B_15_1.raw.thermo.raw"
## [29] "Razor...unique.peptides.HL_428_RE_1B_15_2.raw.thermo.raw"
## [30] "Razor...unique.peptides.HL_428_RE_1B_15_3.raw.thermo.raw"
## [31] "Razor...unique.peptides.HL_428_RE_1C_15_1.raw.thermo.raw"
## [32] "Razor...unique.peptides.HL_428_RE_1C_15_2.raw.thermo.raw"
## [33] "Razor...unique.peptides.HL_428_RE_1C_15_3.raw.thermo.raw"
## [34] "Razor...unique.peptides.HL_428_RE1_A_01.raw.thermo.raw"
## [35] "Razor...unique.peptides.HL_428_RE1_A_02.raw.thermo.raw"
## [36] "Razor...unique.peptides.HL_428_RE1_A_03.raw.thermo.raw"
## [37] "Razor...unique.peptides.HL_428_RE1_B_01.raw.thermo.raw"
## [38] "Razor...unique.peptides.HL_428_RE1_B_02.raw.thermo.raw"
## [39] "Razor...unique.peptides.HL_428_RE1_B_03.raw.thermo.raw"
## [40] "Razor...unique.peptides.HL_428_RE2_A_02.raw.thermo.raw"
## [41] "Razor...unique.peptides.HL_428_RE2_A_03.raw.thermo.raw"
## [42] "Razor...unique.peptides.HL_428_RE2_B_01.raw.thermo.raw"
## [43] "Razor...unique.peptides.HL_428_RE2_B_02.raw.thermo.raw"
## [44] "Razor...unique.peptides.HL_428_RE2_B_03.raw.thermo.raw"
## [45] "Razor...unique.peptides.HL_428_RE2_C_01.raw.thermo.raw"
## [46] "Razor...unique.peptides.HL_428_RE2_C_02.raw.thermo.raw"
## [47] "Razor...unique.peptides.HL_428_RE2_C_03.raw.thermo.raw"
pep.count.table <- data.frame(
count = rowMins(as.matrix(df.prot[-removed_col_indices, c(155:178, 179:193, 195:202)])),
row.names = df.prot$Majority.protein.IDs[-removed_col_indices]
)
pep.count.table$count <- pep.count.table$count + 1
protein.matrix <- as.matrix(df.LFQ.filter)
class <- factor(c(rep("R", 24), rep("RE", 23)))
replicate <- factor(c(rep("rep1", 12), rep("rep2", 12),
rep("rep1", 12), rep("rep2", 11)))
design <- model.matrix(~ 0 + class + replicate)
colnames(design)[1:2] <- c("classR", "classRE")
fit_result <- run_deqms_pipeline(
protein.matrix, design, pep.count.table,
contrast_string = "classR-classRE"
)
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : pseudoinverse used at -0.02864
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : neighborhood radius 1.6136
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : reciprocal condition number 2.8781e-16
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : There are other near singularities as well. 2.5121

DEqMS.results <- outputResult(fit_result, coef_col = 1)
rownames(df.prot) <- df.prot$Majority.protein.IDs
DEqMS.results$Gene.name <- df.prot[DEqMS.results$gene, ]$Gene.names
timestamp <- format(Sys.time(), "%Y%m%d_%H%M%S")
write.csv(DEqMS.results,
file = paste0("/proj/proteomics/7_combinedHL/evaluation_paper/abc_2025/raw_log2FCdata/hl428_all/data_", timestamp, ".csv"))
processed_df <- DEqMS.results %>%
dplyr::filter(!is.na(logFC)) %>%
dplyr::mutate(across(where(is.numeric), ~ round(.x, 3)))
datatable(processed_df, rownames = FALSE)
processed_df_interest <- processed_df %>%
dplyr::filter(Gene.name %in% proteins_of_interest)
datatable(processed_df_interest, rownames = FALSE, caption = "proteins of interest")
vp_lfc_limit <- 5
plot_volcano(DEqMS.results, "R vs RE", vp_lfc_limit)

sig <- DEqMS.results %>% dplyr::filter(sca.P.Value <= 0.05)
n_up <- sum(sig$logFC >= 1.0, na.rm = TRUE)
n_down <- sum(sig$logFC <= -1.0, na.rm = TRUE)
n_total <- sum(abs(sig$logFC) >= 1.0, na.rm = TRUE)
cat("Strongly upregulated (logFC ≥ 1.0):", n_up, "\n")
## Strongly upregulated (logFC ≥ 1.0): 76
cat("Strongly downregulated (logFC ≤ -1.0):", n_down, "\n")
## Strongly downregulated (logFC ≤ -1.0): 28
cat("Total |logFC| ≥ 1.0:", n_total, "\n")
## Total |logFC| ≥ 1.0: 104